# -*- coding: UTF-8 -*-
import scrapy
from imooc.items import ImoocItem

class ImoocSpider(scrapy.Spider):

    name = 'imooc'
    allowed_domains = []
    start_urls = [
        'http://www.imooc.com/course/list'
    ]

    def parse(self, response):
        for sel in response.css('.course-card-container'):
            item = ImoocItem()
            item['title'] = sel.css('a > div.course-card-content > h3.course-card-name::text').extract_first().strip()
            item['link'] = sel.xpath('a/@href').extract_first().strip()
            item['desc'] = sel.css('a > div.course-card-content > p::text').extract_first().strip()
            item['category'] = sel.css('a > .course-card-top.cart-color > span::text').extract_first().strip()
            item['diff'] = sel.css('a > div.course-card-content > div.course-card-bottom > div.course-card-info::text').extract_first().strip()
            yield item

        # url跟进开始
        # 获取下一页的url信息
        url = response.xpath("//a[contains(text(),'%s')]/@href" % (u'下一页')).extract()
        if url :
        # 将信息组合成下一页的url
            page = 'http://www.imooc.com' + url[0]
            yield scrapy.Request(page, callback=self.parse)
        # url跟进结束